import re

import pandas as pd

# 读取 csv 表格数据，并进行遍历
pd_read_csv = pd.read_csv("file\\2024_04_01_marketing_bg_log.csv", sep=",")
# 用来保存每个号码出现的次数
log_dict = {}

# 遍历每一行数据
for index, row in pd_read_csv.iterrows():
    # 日志的文本，格式如下：
    # 重复的数据： [6bdc159b-60c1-4dda-b9f6-efa7b6773812] 批量生成员工券，入参：BatchCreateStaffCouponRpcReq(
    # batchCreateStaffCouponDTOS=[BatchCreateStaffCouponDTO(templateId=39, userPhone=13797038082, gainType=1,
    # couponCount=50)], batchCode=2024-04-01 09-41-17)
    content_ = row['content']
    # 取 template_id 字段，user_phone字段，batch_code字段
    template_ids = re.findall(r'templateId=(\d+)', content_)
    template_id = template_ids[0] if template_ids else None
    user_phones = re.findall(r'userPhone=(\d+)', content_)
    user_phone = user_phones[0] if user_phones else None
    batch_codes = re.findall(r'batchCode=(.+)\)', content_)
    batch_code = batch_codes[0] if batch_codes else None
    # print(f"template_id:{template_id},user_phone:{user_phone},batch_code:{batch_code}")
    if user_phone is not None:
        phone_count = log_dict.get(user_phone, 0)
        phone_count += 1
        log_dict[user_phone] = phone_count

for phone, count in log_dict.items():
    if count >= 2:
        print(f"重复的数据：{phone}, 次数：{count}")
